
**
** 1. Aggregation at Brand level (notation: defX - brand), quarterly
**

use  "INTERMEDIATE/upc_quarter_actual-balanced.dta", clear

// name_firm (name) firm (numeric code) should be generated once GS1 data is obtained

merge m:1 upc upc_ver using "INTERMEDIATE/products_clean_brand.dta", keepusing(brand_descr_main brand_attempt_main brand_descr_altern brand_attempt_altern)
keep if _merge==3
drop _merge

count
egen product= group(gr1 firm brand_descr generic)     // define product here
drop if product==.
drop if gr1==.
drop if firm==.
drop if brand==.
drop if generic==.

gen numberUPCs=1
collapse (sum) revenue quantity numberUPCs num num_store multiple_versions ///
, by(product year quarter department group module gr1 name_firm firm brand brand_descr generic flagHMSonly)
count
rename quarter qtr
egen quarter=group(year qtr)
gen t=yq(year,qtr)
format %tq t

compress

* Creates longitudinal variables 

* Defines cohort/entryTime, exitTime, age, and MaxAge
bysort product: egen x=min(quarter)
gen lcensored=(x==1)
gen cohort=x 
bysort product: egen y=max(quarter)
qui su y
gen rcensored=(y==`r(max)') 
gen exitTime=y 
gen age=quarter-cohort
bysort product: egen maxAge=max(age) 
bysort product: egen totalobs=count(quarter)

* Type of observation 
gen type="NA"
replace type="entry"      if age==0 & age!=maxAge  
replace type="continues"  if age>0  & age<maxAge  
replace type="exit"       if age==maxAge & age!=0 
replace type="entry/exit" if age==maxAge & age==0 
gen censored="NA"  // NEW & CORRECTED
replace censored="left censored"   if lcensored==1   & rcensored==0
replace censored="right censored"  if rcensored==1 & lcensored==0
replace censored="left censored & right censored" if lcensored==1  & rcensored==1   
replace censored="not censored" if lcensored==0  & rcensored==0
tab type censored, m

gen flag_longitudinal="NA"
replace flag_longitudinal="Complete" if totalobs==y-x+1
replace flag_longitudinal="Incomplete" if totalobs<y-x+1

* Revenue variables
merge m:1 t year qtr using "$dir/BCindicators/cpi.dta", keep(1 3)
gen revenue_r=revenue/cpi
bys product: egen rr=mean(revenue_r) if age>0  & age<maxAge  
bys product: egen revenue_r_m=mean(rr)
drop rr


* Clean variables
	
* Cleaned data
keep product t year quarter qtr department group module gr1 name_firm firm brand brand_descr generic ///
flagHMSonly multiple_versions num num_store numberUPCs ///
cohort exitTime censored age maxAge type flag_longitudinal totalobs revenue quantity revenue_r revenue_r_m 
save "INTERMEDIATE/product_RMS_defX_firm_sampleABCDE_06_15.dta", replace

* Rename exits
use "INTERMEDIATE/product_RMS_defX_firm_sampleABCDE_06_15.dta", clear
tempfile exits
keep if type=="exit" | type=="entry/exit"
keep  product year qtr department group module gr1 name_firm firm brand brand_descr generic ///
cohort exitTime censored age maxAge type flag_longitudinal totalobs flagHMSonly multiple_versions revenue_r_m
replace qtr=qtr+1 
replace year=year+1 if qtr==5
replace qtr=1 if qtr==5
egen quarter=group(year qtr)
replace quarter=quarter+1
gen t=yq(year,qtr)
format %tq t
replace type="exit"
save `exits'
use "$dirout/product_RMS_defX_firm_sampleABCDE_06_15.dta", clear
replace type="continues" if type=="exit"
replace type="entry" if type=="entry/exit" 
append using `exits'

* price
gen price=revenue/quantity

* Defines quality for each product
	* Baseline
		bysort quarter module: egen price_median_tm0=median(price) if type=="continues" // changes to exclude entrants and exists from median calculation
		bysort quarter module: egen price_median_tm=mean(price_median_tm0)
		drop price_median_tm0
		gen quality0=ln(price/price_median_tm)
		label variable quality0 "proxy for quality of good j of category c in quarter t"
	* Regression-based 
		*merge m:1 product using "$dir/residual_quality/residual_quality_all.dta", keep (1 3) nogenerate
	* Percentile-based 
		sort  quarter module price
		bysort quarter module : gen  quality7  = 100*((_n-0.5)/_N) 
		label variable quality7 "proxy for quality percentile with regards to "


** sample A

save "INTERMEDIATE/product_RMS_defX_firm_sampleA_06_15.dta", replace

** sample E (Benchmark)

* Sample
	keep if flag_longitudinal=="Complete"
	keep if generic==0
	keep if department!=8 & department!=9

save "INTERMEDIATE/product_RMS_defX_firm_sampleE_06_15.dta", replace




**
** 2. Aggregation at Barcode level, annual
**


use  "INTERMEDIATE/upc_quarter_actual-balanced.dta", clear

// name_firm (name) firm (numeric code) should be generated once GS1 data is obtained

merge m:1 upc upc_ver using "INTERMEDIATE/products_clean_brand.dta", keepusing(brand_descr_main brand_attempt_main brand_descr_altern brand_attempt_altern)
keep if _merge==3
drop _merge

rename upc product
collapse (sum) revenue quantity (mean) num num_store ///
, by(product year department group module name_firm firm brand brand_descr generic flagHMSonly multiple_versions)
count
drop if product==.
compress

* Defines cohort/entryTime, exitTime, age, and MaxAge
bysort product: egen x=min(year)
gen lcensored=(x==1)
gen cohort=x 
bysort product: egen y=max(year)
qui su y
gen rcensored=(y==`r(max)') 
gen exitTime=y 
gen age=year-cohort 
bysort product: egen maxAge=max(age) 
bysort product: egen totalobs=count(year)

* Type of observation 
gen type="NA"
replace type="entry"      if age==0 & age!=maxAge  
replace type="continues"  if age>0  & age<maxAge  
replace type="exit"       if age==maxAge & age!=0 
replace type="entry/exit" if age==maxAge & age==0 
gen censored="NA"   
replace censored="left censored"   if lcensored==1   & rcensored==0
replace censored="right censored"  if rcensored==1 & lcensored==0
replace censored="left censored & right censored" if lcensored==1  & rcensored==1   
replace censored="not censored" if lcensored==0  & rcensored==0
tab type censored, m
gen flag_longitudinal="NA"
replace flag_longitudinal="Complete" if totalobs==y-x+1
replace flag_longitudinal="Incomplete" if totalobs<y-x+1

* Revenue variables
merge m:1 year using "INPUT/cpi.dta", keep(1 3)
gen revenue_r=revenue/cpi
bys product: egen rr=mean(revenue_r) if age>0  & age<maxAge  
bys product: egen revenue_r_m=mean(rr)
drop rr
	
* Cleaned data
keep product year department group module name_firm firm brand brand_descr generic ///
flagHMSonly multiple_versions num num_store ///
cohort censored age maxAge type flag_longitudinal totalobs revenue quantity revenue_r revenue_r_m 
save "INTERMEDIATE/temp.dta", replace

use "INTERMEDIATE/temp.dta", clear
tempfile exits
keep if type=="exit" | type=="entry/exit"
keep  product year department group module name_firm firm brand brand_descr generic ///
censored age maxAge type flag_longitudinal totalobs flagHMSonly multiple_versions revenue_r_m
replace year=year+1 
replace type="exit"
save `exits'
use "INTERMEDIATE/temp.dta", clear
replace type="continues" if type=="exit"
replace type="entry" if type=="entry/exit" 
append using `exits'

	keep if flag_longitudinal=="Complete"
	keep if generic==0
	keep if department!=8 & department!=9
	gen price=revenue/quantity

		bysort year module: egen price_median_tm0=median(price) if type=="continues" 
		bysort year module: egen price_median_tm=mean(price_median_tm0)
		drop price_median_tm0
		gen quality0=ln(price/price_median_tm)
		label variable quality0 "proxy for quality of good j of category c in year t" 
		sort  year module price
		bysort year module : gen  quality7  = 100*((_n-0.5)/_N) 
		label variable quality7 "proxy for quality percentile with regards to "
	
save "INTERMEDIATE/product_RMS_def10_firm_sampleE_06_15_annual.dta", replace


***********
** 3. Aggregation at FIRM X GROUP level
***********

use "INTERMEDIATE/product_RMS_def10_firm_SampleE_06_15.dta", clear
drop if firm==. | group==.  
egen firmgroup=group(firm group)
bysort firmgroup quarter: egen products_ijt    = nvals(product), miss
bysort firmgroup quarter: egen modules_ijt     = nvals(module), miss
gen multi_ijt = 0
replace multi_ijt=1 if products_ijt>1
gen multimodules_ijt=0
replace multimodules_ijt=1 if modules_ijt>1
bysort firmgroup: egen birth_firm = min(quarter)
gen firm_age = quarter-birth_firm
bysort firmgroup: egen max_Age_firm = max(firm_age)

* Total number of entrant products
gen entry=(type=="entry")
bys firmgroup quarter: egen N_ijt = sum(entry)

* Total number of exiting products
gen exit=(type=="exit")
bys firmgroup quarter: egen X_ijt  = sum(exit)

* Total number of continuing products:
gen continue=0
replace continue=1 if exit==0 & entry==0
bys firmgroup quarter: egen C_ijt = sum(continue)

* Total Number of Product as N_e+C (T1):
gen T_ijt = N_ijt+C_ijt

* Change in the number of products:
preserve

tempfile last
duplicates drop firmgroup quarter, force
sort firmgroup quarter
xtset firmgroup quarter
gen total_product_1_last = T_ijt[_n-1]
replace total_product_1_last =0 if firm_age==0
gen delta_T_ijt =  T_ijt - total_product_1_last
gen delta_T2_ijt = N_ijt-X_ijt

sort firmgroup quarter
bys firmgroup: gen T2_ijt = sum(delta_T2_ijt)
drop *last

save "`last'"
restore

sort firmgroup quarter
merge m:1 firmgroup quarter using "`last'"
drop _merge

xtset product quarter

* Total revenue within the firmgroup:
bys firmgroup quarter: egen rev_ijt = sum(revenue)
bys firmgroup quarter: egen rev_r_ijt = sum(revenue_r)
bys firmgroup quarter: egen rev_mean_ijt = mean(revenue)
bys firmgroup quarter: egen rev_r_mean_ijt = mean(revenue_r)
sort product quarter
bys product: gen revenue_next = revenue[_n+1]
bys product: gen revenue_r_next = revenue_r[_n+1]

* Revenue within the firm of entrants:
bys firmgroup quarter entry: egen total_revenue_entry_0 = mean(revenue)
replace total_revenue_entry=0 if entry==0
bysort firmgroup quarter: egen revN_mean_ijt = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firmgroup quarter entry: egen total_revenue_entry_0 = mean(revenue_r)
replace total_revenue_entry=0 if entry==0
bysort firmgroup quarter: egen revN_r_mean_ijt = max(total_revenue_entry_0)
drop total_revenue_entry_0
xtset product quarter
bys firmgroup quarter entry: egen total_revenue_entry_0 = mean(revenue_next)
replace total_revenue_entry=0 if entry==0
bysort firmgroup quarter: egen revN_meanF_ijt = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firmgroup quarter entry: egen total_revenue_entry_0 = mean(revenue_r_next)
replace total_revenue_entry=0 if entry==0
bysort firmgroup quarter: egen revN_r_meanF_ijt = max(total_revenue_entry_0)
drop total_revenue_entry_0

* Revenue within the firmgroup of exits:
sort product quarter
bys product: gen revenue_last = revenue[_n-1]
bys product: gen revenue_r_last = revenue_r[_n-1]
bys product: gen revenue_2ago = revenue[_n-2]
bys product: gen revenue_r_2ago = revenue_r[_n-2]

bys firmgroup quarter exit: egen total_revenue_exit_0 = mean(revenue_last)
replace total_revenue_exit_0=0 if exit==0
bysort firmgroup quarter: egen revX_mean_ijt = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firmgroup quarter exit: egen total_revenue_exit_0 = mean(revenue_r_last)
replace total_revenue_exit_0=0 if exit==0
bysort firmgroup quarter: egen revX_r_mean_ijt = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firmgroup quarter exit: egen total_revenue_exit_0 = mean(revenue_2ago)
replace total_revenue_exit_0=0 if exit==0
bysort firmgroup quarter: egen revX_meanL_ijt = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firmgroup quarter exit: egen total_revenue_exit_0 = mean(revenue_r_2ago)
replace total_revenue_exit_0=0 if exit==0
bysort firmgroup quarter: egen revX_r_meanL_ijt = max(total_revenue_exit_0)
drop total_revenue_exit_0

* Total within the firmgroup:
bys firmgroup quarter: egen temp1 = sum(revenue*quality0) if quality0!=.
bys firmgroup quarter: egen temp2 = sum(revenue) if quality0!=.
gen temp3=temp1/temp2
bys firmgroup quarter: egen q_ijt = mean(temp3) 
bys firmgroup quarter: egen q_mean_ijt = mean(quality0)
bys firmgroup quarter: egen q_p50_ijt = median(quality0)
drop temp*

* Quality within the firmgroup of entrants:
bys firmgroup quarter: egen temp1 = sum(revenue*quality0) if quality0!=. & entry==1
bys firmgroup quarter: egen temp2 = sum(revenue) if quality0!=. & entry==1
gen temp3=temp1/temp2
bys firmgroup quarter: egen qN_ijt = mean(temp3) 
bys firmgroup quarter: egen temp4 = mean(quality0) if quality0!=. & entry==1
bys firmgroup quarter: egen qN_mean_ijt = mean(temp4)
drop temp*
sort product quarter
bys product: gen quality0_last = quality0[_n-1]

bys firmgroup quarter: egen temp1 = sum(revenue_r_last*quality0_last) if quality0_last!=. & exit==1
bys firmgroup quarter: egen temp2 = sum(revenue_r_last) if quality0_last!=. & exit==1
gen temp3=temp1/temp2
bys firmgroup quarter: egen qX_ijt = mean(temp3) 
bys firmgroup quarter: egen temp4 = mean(quality0_last) if quality0_last!=. & exit==1
bys firmgroup quarter: egen qX_mean_ijt = mean(temp4)
drop temp*

* Generating main group and share of main module:
bysort firmgroup quarter module: egen revenue_module = sum(revenue)
bysort firmgroup quarter: egen revenue_module_max = max(revenue_module)
gen mainmodule0=module if revenue_module==revenue_module_max
bysort firmgroup quarter: egen mainmodule_ijt = max(mainmodule0)
drop mainmodule0
gen revmainmodule_ijt = revenue_module_max/rev_ijt

*HHI product:
gen revenue_share_product = revenue/rev_ijt
gen share_sqrd_product = revenue_share^2
bys firmgroup quarter: egen hhi_product_ijt = sum(share_sqrd)

*HHI module:
gen revenue_share_module = revenue_module/rev_ijt
gen share_sqrd_product_mod = revenue_share_module^2
bys firmgroup quarter module: egen mod_count = count(product)
bys firmgroup quarter: egen hhi_module_ijt = sum(share_sqrd_product_mod/mod_count)

foreach num of numlist 1(1)40 {
gen zz=revenue if cohort==`num'
bys firmgroup quarter: egen rev_c`num'_ijt = sum(zz)
gen zzz=revenue_r if cohort==`num'
bys firmgroup quarter: egen rev_r_c`num'_ijt = sum(zzz)
drop zz zzz
gen zz=revenue if cohort>`num'
bys firmgroup quarter: egen rev_pc`num'_ijt = sum(zz)
gen zzz=revenue_r if cohort>`num'
bys firmgroup quarter: egen rev_r_pc`num'_ijt = sum(zzz)
drop zz zzz
}

* Total revenue within the firmgroup:
bys firmgroup quarter: egen quantity_ijt = sum(quantity)
gen price_ijt=rev_ijt/quantity_ijt

merge m:1 module using "INPUT/units_per_module_clean.dta", keep(1 3) keepusing(nvals_unit diff_unit) nogenerate
bys firmgroup quarter: egen tempp = sum(revenue) if diff_unit==0
bys firmgroup quarter: egen rev_unit_ijt = mean(tempp) 
bys firmgroup quarter: egen temppp = sum(quantity) if diff_unit==0
bys firmgroup quarter: egen quantity_unit_ijt = mean(temppp) 
drop tempp temppp nvals_unit diff_unit
gen price_unit_ijt=rev_unit_ijt/quantity_unit_ijt

xtset product quarter

duplicates drop firmgroup firm group quarter *_ijt, force
keep firmgroup firm group year qtr quarter t *_ijt
order  firmgroup firm group year qtr quarter t *_ijt

// Defines cohort/entryTime
bysort firm: egen xx=min(quarter)
qui su xx
gen lcensored_ij=(xx==`r(min)') 
label variable lcensored_ij "Firm already exists in 2006"
gen cohort_ij=xx 
label variable cohort_ij "First year we observe the firm, 2006 is censored"

// Defines ExitTime
bysort firm: egen yy=max(quarter)
qui su yy
gen rcensored_ij=(yy==`r(max)') 
label variable rcensored_ij "Firm continues beyond 2015"
gen exitTime_ij=yy -1 //                                     
label variable exitTime_ij "Last year we observed the firm, 2015 is censored"

// Defines Age
gen age_ijt=quarter-cohort_ij
replace age_ijt=. if quarter>exitTime_ij
label variable age_ijt "Age of the firm, for lcensored means time in dataset"
bysort firm: egen maxAge_ij=max(age_ijt) 

// Type of observation 
gen type_ijt="NA"
replace type_ijt="entry"  if age==0 & age!=maxAge  
replace type_ijt="continues"  if age>0  & age<maxAge  
replace type_ijt="exit"       if age==maxAge & age!=0 
replace type_ijt="entry/exit" if age==maxAge & age==0 
gen gamma_ijt=1      if type=="entry"
replace gamma_ijt=0  if type=="continues"
replace gamma_ijt=-1 if type=="exit"
gen obs=1 if T2_ijt>0
bysort firmgroup: egen totalobs_ij=sum(obs)
replace totalobs_ij= totalobs_ij -1
gen flag_longitudinal_ij="NA"
replace flag_longitudinal_ij="Complete" if totalobs_ij==yy-xx
replace flag_longitudinal_ij="Incomplete" if totalobs_ij<yy-xx
drop xx yy obs 


// 2006 Q1
su quarter
foreach var of varlist C_ijt N*_ijt X*_ijt delta* revN* revX* qN* qX* {
replace `var'=. if quarter==`r(min)'
}
// 2006 Q2
qui su quarter if quarter>`r(min)' & quarter<`r(max)'
foreach var of varlist C_ijt N*_ijt X*_ijt delta* revN* revX* qN* qX* {
replace `var'=. if quarter==`r(min)'
}
// 2016 Q1
su quarter
drop if quarter==`r(max)'

//2015 Q4
su quarter 
foreach var of varlist X*_ijt revX* qX* {  
replace `var'=. if quarter==`r(max)'
}

save "INTERMEDIATE/firmXgroup_RMS_def10_sampleE_06_15.dta", replace


************************************************************************************************************************
**** 4. FIRM-level aggregation
********************************

use "INTERMEDIATE/product_RMS_def10_firm_SampleE_06_15.dta", clear
drop if firm==.

* Generating number of unique modules, groups, department and products per firm quarter:
bysort firm quarter: egen products_it    = nvals(product), miss
bysort firm quarter: egen modules_it     = nvals(module), miss
bysort firm quarter: egen groups_it      = nvals(group), miss
bysort firm quarter: egen departments_it = nvals(department), miss

* Generating multiple deps, modules, groups and products dummies:
gen multi_it = 0
replace multi_it=1 if products_it>1
gen multimodules_it=0
replace multimodules_it=1 if modules_it>1
gen multigroups_it=0
replace multigroups_it=1 if groups_it>1
gen multidepartments_it=0
replace multidepartments_it=1 if departments_it>1

* Defininng Birth, Age, Exit for module for a firm:
bysort firm module: egen birth_mod = min(quarter)
gen mod_age = quarter-birth_mod
bysort firm module: egen max_Age_mod = max(mod_age)

* Defininng Birth, Age, Exit for group for a firm:
bysort firm group: egen birth_group = min(quarter)
gen group_age = quarter-birth_group
bysort firm group: egen max_Age_group = max(group_age)

* Defining Birth, Age, Exit for Department for the firm:
bysort firm department: egen birth_dep = min(quarter)
gen dep_age = quarter-birth_dep
bysort firm department: egen max_Age_dep = max(dep_age)

* Defining Birth, Age, Exit for Firm:
bysort firm: egen birth_firm = min(quarter)
gen firm_age = quarter-birth_firm
bysort firm: egen max_Age_firm = max(firm_age)

* Entry A: old module 
gen entry_A = 0
replace entry_A = 1 if type=="entry" & mod_age!=0

* Entry B: new module in old group
gen entry_B = 0
replace entry_B = 1 if type=="entry" & mod_age==0 & group_age!=0

* Entry C: New group in old department
gen entry_C = 0
replace entry_C = 1 if type=="entry" & group_age==0 & dep_age!=0

* Entry D: New department in old firm
gen entry_D = 0
replace entry_D = 1 if type=="entry" & dep_age==0 & firm_age!=0

* Entry E: new product of a new firm
gen entry_E = 0
replace entry_E = 1 if type=="entry" & firm_age==0

* Exit A: exit out of a module that survives:
gen exit_A = 0
replace exit_A=1 if type=="exit" & mod_age!=max_Age_mod

* Exit B: exit of a product unique to the module but not to the group (making sure that is was unique when it died)
gen exit_B=0
replace exit_B=1 if type=="exit" & maxAge!=0 & mod_age==max_Age_mod & modules==1 & group_age!=max_Age_group

* Exit C: unique to the group but not to the department:
gen exit_C = 0
replace exit_C=1 if type=="exit" & group_age==max_Age_group & groups==1 & dep_age!=max_Age_dep

gen exit_D=0
replace exit_D=1 if type=="exit" & departments==1 & dep_age==max_Age_dep

* Exit E: exit product of an exit firm:
gen exit_E=0
replace exit_E = 1 if type=="exit" & firm_age==max_Age_firm

* Total number of entrant products
gen entry=(type=="entry")
bys firm quarter: egen N_it = sum(entry)

* Total number of entry_A
bys firm quarter: egen Na_it = sum(entry_A)

* Total number of entry_B
bys firm quarter: egen Nb_it = sum(entry_B)

* Total number of entry_C
bys firm quarter: egen Nc_it  = sum(entry_C)

* Total number of entry_D
bys firm quarter: egen Nd_it  = sum(entry_D)

* Total number of entry_E
bys firm quarter: egen Ne_it  = sum(entry_E)

* Total number of exiting products
gen exit=(type=="exit")
bys firm quarter: egen X_it  = sum(exit)

* Total number of exit_A
bys firm quarter: egen Xa_it = sum(exit_A)

* Total number of exit_B
bys firm quarter: egen Xb_it = sum(exit_B)

* Total number of exit_C
bys firm quarter: egen Xc_it = sum(exit_C)

* Total number of exit_D
bys firm quarter: egen Xd_it = sum(exit_D)

* Total number of exit_E
bys firm quarter: egen Xe_it = sum(exit_E)

* Total number of continuing products:
gen continue=0
replace continue=1 if exit==0 & entry==0
bys firm quarter: egen C_it = sum(continue)

* Total Number of Product as N_e+C (T1):
gen T_it = N_it+C_it

* Change in the number of products:
preserve

tempfile last
duplicates drop firm quarter, force
sort firm quarter
xtset firm quarter
gen total_product_1_last = T_it[_n-1]
replace total_product_1_last =0 if firm_age==0
gen delta_T_it =  T_it - total_product_1_last
gen delta_T2_it = N_it-X_it

sort firm quarter
bys firm: gen T2_it = sum(delta_T2_it)
drop *last

save "`last'"
restore

sort firm quarter
merge m:1 firm quarter using "`last'"
drop _merge

xtset product quarter

* Total revenue within the firm:
bys firm quarter: egen rev_it = sum(revenue)
bys firm quarter: egen rev_r_it = sum(revenue_r)
bys firm quarter: egen rev_mean_it = mean(revenue)
bys firm quarter: egen rev_r_mean_it = mean(revenue_r)

sort product quarter
bys product: gen revenue_next = revenue[_n+1]
bys product: gen revenue_r_next = revenue_r[_n+1]

* Revenue within the firm of entrants:
bys firm quarter entry: egen total_revenue_entry_0 = mean(revenue)
replace total_revenue_entry=0 if entry==0
bysort firm quarter: egen revN_mean_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry: egen total_revenue_entry_0 = mean(revenue_r)
replace total_revenue_entry=0 if entry==0
bysort firm quarter: egen revN_r_mean_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
xtset product quarter
bys firm quarter entry: egen total_revenue_entry_0 = mean(revenue_next)
replace total_revenue_entry=0 if entry==0
bysort firm quarter: egen revN_meanF_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry: egen total_revenue_entry_0 = mean(revenue_r_next)
replace total_revenue_entry=0 if entry==0
bysort firm quarter: egen revN_r_meanF_it = max(total_revenue_entry_0)
drop total_revenue_entry_0

* Revenue within the firm of entry_A:
bys firm quarter entry_A: egen total_revenue_entry_0 = mean(revenue)
replace total_revenue_entry_0=0 if entry_A==0
bysort firm quarter: egen revNa_mean_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_A: egen total_revenue_entry_0 = mean(revenue_r)
replace total_revenue_entry_0=0 if entry_A==0
bysort firm quarter: egen revNa_r_mean_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_A: egen total_revenue_entry_0 = mean(revenue_next)
replace total_revenue_entry_0=0 if entry_A==0
bysort firm quarter: egen revNa_meanF_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_A: egen total_revenue_entry_0 = mean(revenue_r_next)
replace total_revenue_entry_0=0 if entry_A==0
bysort firm quarter: egen revNa_r_meanF_it = max(total_revenue_entry_0)
drop total_revenue_entry_0

* Revenue within the firm of entry_B:
bys firm quarter entry_B: egen total_revenue_entry_0 = mean(revenue)
replace total_revenue_entry_0=0 if entry_B==0
bysort firm quarter: egen revNb_mean_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_B: egen total_revenue_entry_0 = mean(revenue_r)
replace total_revenue_entry_0=0 if entry_B==0
bysort firm quarter: egen revNb_r_mean_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_B: egen total_revenue_entry_0 = mean(revenue_next)
replace total_revenue_entry_0=0 if entry_B==0
bysort firm quarter: egen revNb_meanF_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_B: egen total_revenue_entry_0 = mean(revenue_r_next)
replace total_revenue_entry_0=0 if entry_B==0
bysort firm quarter: egen revNb_r_meanF_it = max(total_revenue_entry_0)
drop total_revenue_entry_0

* Revenue within the firm of entry_C:
bys firm quarter entry_C: egen total_revenue_entry_0 = mean(revenue)
replace total_revenue_entry_0=0 if entry_C==0
bysort firm quarter: egen revNc_mean_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_C: egen total_revenue_entry_0 = mean(revenue_r)
replace total_revenue_entry_0=0 if entry_C==0
bysort firm quarter: egen revNc_r_mean_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_C: egen total_revenue_entry_0 = mean(revenue_next)
replace total_revenue_entry_0=0 if entry_C==0
bysort firm quarter: egen revNc_meanF_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_C: egen total_revenue_entry_0 = mean(revenue_r_next)
replace total_revenue_entry_0=0 if entry_C==0
bysort firm quarter: egen revNc_r_meanF_it = max(total_revenue_entry_0)
drop total_revenue_entry_0

* Revenue within the firm of entry_D:
bys firm quarter entry_D: egen total_revenue_entry_0 = mean(revenue)
replace total_revenue_entry_0=0 if entry_D==0
bysort firm quarter: egen revNd_mean_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_D: egen total_revenue_entry_0 = mean(revenue_r)
replace total_revenue_entry_0=0 if entry_D==0
bysort firm quarter: egen revNd_r_mean_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_D: egen total_revenue_entry_0 = mean(revenue_next)
replace total_revenue_entry_0=0 if entry_D==0
bysort firm quarter: egen revNd_meanF_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_D: egen total_revenue_entry_0 = mean(revenue_r_next)
replace total_revenue_entry_0=0 if entry_D==0
bysort firm quarter: egen revNd_r_meanF_it = max(total_revenue_entry_0)
drop total_revenue_entry_0

* Revenue within the firm of entry_E:
bys firm quarter entry_E: egen total_revenue_entry_0 = mean(revenue)
replace total_revenue_entry_0=0 if entry_E==0
bysort firm quarter: egen revNe_mean_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_E: egen total_revenue_entry_0 = mean(revenue_r)
replace total_revenue_entry_0=0 if entry_E==0
bysort firm quarter: egen revNe_r_mean_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_E: egen total_revenue_entry_0 = mean(revenue_next)
replace total_revenue_entry_0=0 if entry_E==0
bysort firm quarter: egen revNe_meanF_it = max(total_revenue_entry_0)
drop total_revenue_entry_0
bys firm quarter entry_E: egen total_revenue_entry_0 = mean(revenue_r_next)
replace total_revenue_entry_0=0 if entry_E==0
bysort firm quarter: egen revNe_r_meanF_it = max(total_revenue_entry_0)
drop total_revenue_entry_0

* Revenue within the firm of exits:
sort product quarter
bys product: gen revenue_last = revenue[_n-1]
bys product: gen revenue_r_last = revenue_r[_n-1]
bys product: gen revenue_2ago = revenue[_n-2]
bys product: gen revenue_r_2ago = revenue_r[_n-2]

bys firm quarter exit: egen total_revenue_exit_0 = mean(revenue_last)
replace total_revenue_exit_0=0 if exit==0
bysort firm quarter: egen revX_mean_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit: egen total_revenue_exit_0 = mean(revenue_r_last)
replace total_revenue_exit_0=0 if exit==0
bysort firm quarter: egen revX_r_mean_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit: egen total_revenue_exit_0 = mean(revenue_2ago)
replace total_revenue_exit_0=0 if exit==0
bysort firm quarter: egen revX_meanL_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit: egen total_revenue_exit_0 = mean(revenue_r_2ago)
replace total_revenue_exit_0=0 if exit==0
bysort firm quarter: egen revX_r_meanL_it = max(total_revenue_exit_0)
drop total_revenue_exit_0

* Revenue within the firm of exit_A:
bys firm quarter exit_A: egen total_revenue_exit_0 = mean(revenue_last)
replace total_revenue_exit_0=0 if exit_A==0
bysort firm quarter: egen revXa_mean_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_A: egen total_revenue_exit_0 = mean(revenue_r_last)
replace total_revenue_exit_0=0 if exit_A==0
bysort firm quarter: egen revXa_r_mean_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_A: egen total_revenue_exit_0 = mean(revenue_2ago)
replace total_revenue_exit_0=0 if exit_A==0
bysort firm quarter: egen revXa_meanL_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_A: egen total_revenue_exit_0 = mean(revenue_r_2ago)
replace total_revenue_exit_0=0 if exit_A==0
bysort firm quarter: egen revXa_r_meanL_it = max(total_revenue_exit_0)
drop total_revenue_exit_0

* Revenue within the firm of exit_B:
bys firm quarter exit_B: egen total_revenue_exit_0 = mean(revenue_last)
replace total_revenue_exit_0=0 if exit_B==0
bysort firm quarter: egen revXb_mean_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_B: egen total_revenue_exit_0 = mean(revenue_r_last)
replace total_revenue_exit_0=0 if exit_B==0
bysort firm quarter: egen revXb_r_mean_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_B: egen total_revenue_exit_0 = mean(revenue_2ago)
replace total_revenue_exit_0=0 if exit_B==0
bysort firm quarter: egen revXb_meanL_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_B: egen total_revenue_exit_0 = mean(revenue_r_2ago)
replace total_revenue_exit_0=0 if exit_B==0
bysort firm quarter: egen revXb_r_meanL_it = max(total_revenue_exit_0)
drop total_revenue_exit_0

* Revenue within the firm of exit_C:
bys firm quarter exit_C: egen total_revenue_exit_0 = mean(revenue_last)
replace total_revenue_exit_0=0 if exit_C==0
bysort firm quarter: egen revXc_mean_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_C: egen total_revenue_exit_0 = mean(revenue_r_last)
replace total_revenue_exit_0=0 if exit_C==0
bysort firm quarter: egen revXc_r_mean_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_C: egen total_revenue_exit_0 = mean(revenue_2ago)
replace total_revenue_exit_0=0 if exit_C==0
bysort firm quarter: egen revXc_meanL_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_C: egen total_revenue_exit_0 = mean(revenue_r_2ago)
replace total_revenue_exit_0=0 if exit_C==0
bysort firm quarter: egen revXc_r_meanL_it = max(total_revenue_exit_0)
drop total_revenue_exit_0

* Total revenue within the firm of exit_D:
bys firm quarter exit_D: egen total_revenue_exit_0 = mean(revenue_last)
replace total_revenue_exit_0=0 if exit_D==0
bysort firm quarter: egen revXd_mean_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_D: egen total_revenue_exit_0 = mean(revenue_r_last)
replace total_revenue_exit_0=0 if exit_D==0
bysort firm quarter: egen revXd_r_mean_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_D: egen total_revenue_exit_0 = mean(revenue_2ago)
replace total_revenue_exit_0=0 if exit_D==0
bysort firm quarter: egen revXd_meanL_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_D: egen total_revenue_exit_0 = mean(revenue_r_2ago)
replace total_revenue_exit_0=0 if exit_D==0
bysort firm quarter: egen revXd_r_meanL_it = max(total_revenue_exit_0)
drop total_revenue_exit_0

* Total revenue within the firm of exit_E:
bys firm quarter exit_E: egen total_revenue_exit_0 = mean(revenue_last)
replace total_revenue_exit_0=0 if exit_E==0
bysort firm quarter: egen revXe_mean_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_E: egen total_revenue_exit_0 = mean(revenue_r_last)
replace total_revenue_exit_0=0 if exit_E==0
bysort firm quarter: egen revXe_r_mean_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_E: egen total_revenue_exit_0 = mean(revenue_2ago)
replace total_revenue_exit_0=0 if exit_E==0
bysort firm quarter: egen revXe_meanL_it = max(total_revenue_exit_0)
drop total_revenue_exit_0
bys firm quarter exit_E: egen total_revenue_exit_0 = mean(revenue_r_2ago)
replace total_revenue_exit_0=0 if exit_E==0
bysort firm quarter: egen revXe_r_meanL_it = max(total_revenue_exit_0)
drop total_revenue_exit_0

* Total within the firm:
bys firm quarter: egen temp1 = sum(revenue*quality0) if quality0!=.
bys firm quarter: egen temp2 = sum(revenue) if quality0!=.
gen temp3=temp1/temp2
bys firm quarter: egen q_it = mean(temp3) 
bys firm quarter: egen q_mean_it = mean(quality0)
bys firm quarter: egen q_p50_it = median(quality0)
drop temp*

* Quality within the firm of entrants:
bys firm quarter: egen temp1 = sum(revenue*quality0) if quality0!=. & entry==1
bys firm quarter: egen temp2 = sum(revenue) if quality0!=. & entry==1
gen temp3=temp1/temp2
bys firm quarter: egen qN_it = mean(temp3) 
bys firm quarter: egen temp4 = mean(quality0) if quality0!=. & entry==1
bys firm quarter: egen qN_mean_it = mean(temp4)
drop temp*

* Quality within the firm of entry_A:
bys firm quarter: egen temp1 = sum(revenue*quality0) if quality0!=. & entry_A==1
bys firm quarter: egen temp2 = sum(revenue) if quality0!=. & entry_A==1
gen temp3=temp1/temp2
bys firm quarter: egen qNa_it = mean(temp3) 
bys firm quarter: egen temp4 = mean(quality0) if quality0!=. & entry_A==1
bys firm quarter: egen qNa_mean_it = mean(temp4)
drop temp*

* Quality within the firm of entry_B:
bys firm quarter: egen temp1 = sum(revenue*quality0) if quality0!=. & entry_B==1
bys firm quarter: egen temp2 = sum(revenue) if quality0!=. & entry_B==1
gen temp3=temp1/temp2
bys firm quarter: egen qNb_it = mean(temp3) 
bys firm quarter: egen temp4 = mean(quality0) if quality0!=. & entry_B==1
bys firm quarter: egen qNb_mean_it = mean(temp4)
drop temp*

* Quality within the firm of entry_C:
bys firm quarter: egen temp1 = sum(revenue*quality0) if quality0!=. & entry_C==1
bys firm quarter: egen temp2 = sum(revenue) if quality0!=. & entry_C==1
gen temp3=temp1/temp2
bys firm quarter: egen qNc_it = mean(temp3) 
bys firm quarter: egen temp4 = mean(quality0) if quality0!=. & entry_C==1
bys firm quarter: egen qNc_mean_it = mean(temp4)
drop temp*

* Quality within the firm of entry_D:
bys firm quarter: egen temp1 = sum(revenue*quality0) if quality0!=. & entry_D==1
bys firm quarter: egen temp2 = sum(revenue) if quality0!=. & entry_D==1
gen temp3=temp1/temp2
bys firm quarter: egen qNd_it = mean(temp3) 
bys firm quarter: egen temp4 = mean(quality0) if quality0!=. & entry_D==1
bys firm quarter: egen qNd_mean_it = mean(temp4)
drop temp*

* Quality within the firm of entry_E:
bys firm quarter: egen temp1 = sum(revenue*quality0) if quality0!=. & entry_E==1
bys firm quarter: egen temp2 = sum(revenue) if quality0!=. & entry_E==1
gen temp3=temp1/temp2
bys firm quarter: egen qNe_it = mean(temp3) 
bys firm quarter: egen temp4 = mean(quality0) if quality0!=. & entry_E==1
bys firm quarter: egen qNe_mean_it = mean(temp4)
drop temp*

sort product quarter
bys product: gen quality0_last = quality0[_n-1]

bys firm quarter: egen temp1 = sum(revenue_r_last*quality0_last) if quality0_last!=. & exit==1
bys firm quarter: egen temp2 = sum(revenue_r_last) if quality0_last!=. & exit==1
gen temp3=temp1/temp2
bys firm quarter: egen qX_it = mean(temp3) 
bys firm quarter: egen temp4 = mean(quality0_last) if quality0_last!=. & exit==1
bys firm quarter: egen qX_mean_it = mean(temp4)
drop temp*

* Quality within the firm of exit_A:
bys firm quarter: egen temp1 = sum(revenue_r_last*quality0_last) if quality0_last!=. & exit_A==1
bys firm quarter: egen temp2 = sum(revenue_r_last) if quality0_last!=. & exit_A==1
gen temp3=temp1/temp2
bys firm quarter: egen qXa_it = mean(temp3) 
bys firm quarter: egen temp4 = mean(quality0_last) if quality0_last!=. & exit_A==1
bys firm quarter: egen qXa_mean_it = mean(temp4)
drop temp*

* Quality within the firm of exit_B:
bys firm quarter: egen temp1 = sum(revenue_r_last*quality0_last) if quality0_last!=. & exit_B==1
bys firm quarter: egen temp2 = sum(revenue_r_last) if quality0_last!=. & exit_B==1
gen temp3=temp1/temp2
bys firm quarter: egen qXb_it = mean(temp3) 
bys firm quarter: egen temp4 = mean(quality0_last) if quality0_last!=. & exit_B==1
bys firm quarter: egen qXb_mean_it = mean(temp4)
drop temp*

* Quality within the firm of exit_C:
bys firm quarter: egen temp1 = sum(revenue_r_last*quality0_last) if quality0_last!=. & exit_C==1
bys firm quarter: egen temp2 = sum(revenue_r_last) if quality0_last!=. & exit_C==1
gen temp3=temp1/temp2
bys firm quarter: egen qXc_it = mean(temp3) 
bys firm quarter: egen temp4 = mean(quality0_last) if quality0_last!=. & exit_C==1
bys firm quarter: egen qXc_mean_it = mean(temp4)
drop temp*

* Quality within the firm of exit_D:
bys firm quarter: egen temp1 = sum(revenue_r_last*quality0_last) if quality0_last!=. & exit_D==1
bys firm quarter: egen temp2 = sum(revenue_r_last) if quality0_last!=. & exit_D==1
gen temp3=temp1/temp2
bys firm quarter: egen qXd_it = mean(temp3) 
bys firm quarter: egen temp4 = mean(quality0_last) if quality0_last!=. & exit_D==1
bys firm quarter: egen qXd_mean_it = mean(temp4)
drop temp*

* Quality within the firm of exit_E:
bys firm quarter: egen temp1 = sum(revenue_r_last*quality0_last) if quality0_last!=. & exit_E==1
bys firm quarter: egen temp2 = sum(revenue_r_last) if quality0_last!=. & exit_E==1
gen temp3=temp1/temp2
bys firm quarter: egen qXe_it = mean(temp3) 
bys firm quarter: egen temp4 = mean(quality0_last) if quality0_last!=. & exit_E==1
bys firm quarter: egen qXe_mean_it = mean(temp4)
drop temp*

rename flagHMSonly flagHMSonly_i
duplicates drop firm quarter *_it *_i, force
keep firm year qtr quarter t *_it *_i
order  firm year qtr quarter t *_it *_i

// Defines cohort/entryTime
bysort firm: egen xx=min(quarter)
qui su xx
gen lcensored_i=(xx==`r(min)') 
label variable lcensored_i "Firm already exists in 2006"
gen cohort_i=xx 
label variable cohort_i "First year we observe the firm, 2006 is censored"

// Defines ExitTime
bysort firm: egen yy=max(quarter)
qui su yy
gen rcensored_i=(yy==`r(max)') 
label variable rcensored_i "Firm continues beyond 2015"
gen exitTime_i=yy -1 //                                     !! diff from product
label variable exitTime_i "Last year we observed the firm, 2015 is censored"

// Defines Age
gen age_it=quarter-cohort_i
replace age_it=. if quarter>exitTime_i
label variable age_it "Age of the firm, for lcensored means time in dataset"
bysort firm: egen maxAge_i=max(age_it) 

// Type of observation 
gen type_it="NA"
replace type_it="entry"  if age==0 & age!=maxAge  
replace type_it="continues"  if age>0  & age<maxAge  
replace type_it="exit"       if age==maxAge & age!=0 
replace type_it="entry/exit" if age==maxAge & age==0 
gen gamma_it=1      if type=="entry"
replace gamma_it=0  if type=="continues"
replace gamma_it=-1 if type=="exit"

// Longitudinal   
gen obs=1 if T2_it>0
bysort firm: egen totalobs_i=sum(obs)
gen flag_longitudinal_i="NA"
replace flag_longitudinal_i="Complete" if totalobs_i==yy-xx
replace flag_longitudinal_i="Incomplete" if totalobs_i<yy-xx
drop xx yy obs 
replace totalobs_i= totalobs_i -1 //                 

// Generating multiple deps, modules, groups and products dummies:

gen if_single_product_it       =(multi_it ==0)
gen if_single_module_it        =(multi_it ==1 & multimodules_it ==0)
gen if_single_group_it         =(multi_it ==1 & multimodules_it ==1 & multigroups_it ==0)
gen if_single_department_it    =(multi_it ==1 & multimodules_it ==1 & multigroups_it ==1 & multidepartments_it ==0)
gen if_multi_department_it     =(multi_it ==1 & multimodules_it ==1 & multigroups_it ==1 & multidepartments_it ==1)
gen if_multi_product_it        =(multi_it ==1)

// 2006 Q1
su quarter
foreach var of varlist C_it N*_it X*_it delta* revN* revX* qN* qX* {
replace `var'=. if quarter==`r(min)'
}
// 2006 Q2
qui su quarter if quarter>`r(min)' & quarter<`r(max)'
foreach var of varlist C_it N*_it X*_it delta* revN* revX* qN* qX* {
replace `var'=. if quarter==`r(min)'
}
// 2016 Q1
su quarter
drop if quarter==`r(max)'

//2015 Q4
su quarter 
foreach var of varlist X*_it revX* qX* {  
replace `var'=. if quarter==`r(max)'
}

save "INTERMEDIATE/firm_RMS_def10_sampleE_06_15.dta", replace


